install.packages("e1071")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/e1071_1.7-3.tgz'
Content type 'application/x-gzip' length 900756 bytes (879 KB)
==================================================
downloaded 879 KB
The downloaded binary packages are in
/var/folders/w8/ct2m1fx50sj6m94jlql506gc0000gp/T//RtmplFqbG3/downloaded_packages
library(e1071)
Attaching package: ‘e1071’
The following object is masked _by_ ‘.GlobalEnv’:
sigmoid
library(MASS)
n <- 100
set.seed(2018)
x <- rbind(mvrnorm(n/2, c(-2,-2), diag(2)), mvrnorm(n/2, c(2,2), diag(2)))
y <- as.factor(rep(c(1,2), each=n/2))
dat <- data.frame(x1 = x[,1], x2= x[,2], y)
with(dat, plot(x2, x1, pch=18, col=y, asp=1))
head(x)
[,1] [,2]
[1,] -1.4125573 -2.4229840
[2,] -1.4725563 -3.5498782
[3,] -2.5960212 -2.0644293
[4,] -0.7286876 -1.7291186
[5,] -1.7987455 -0.2647163
[6,] -2.4556339 -2.2647112
head(dat)
tail(dat)
Want to compute SVM for above data.
summary(svm1)
Call:
svm(formula = y ~ x1 + x2, data = dat, kernel = "linear", cost = 1000)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1000
Number of Support Vectors: 4
( 2 2 )
Number of Classes: 2
Levels:
1 2
x[svm1$index,]
[,1] [,2]
[1,] -1.12392840 0.9559452
[2,] 0.70176410 -0.4910098
[3,] -0.10162977 0.2994029
[4,] -0.06858902 0.1619788
plot(svm1, dat)
summary(svm1)
Call:
svm(formula = y ~ x1 + x2, data = dat, kernel = "linear", cost = 1000)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1000
Number of Support Vectors: 2
( 1 1 )
Number of Classes: 2
Levels:
1 2
x[svm1$index,]
[,1] [,2]
[1,] -1.0651731 0.0994707
[2,] -0.3689705 0.9629019
plot(svm1, dat)
Now perform with new data
xnew <- rbind(mvrnorm(n/2, c(-2,-2), diag(2)), mvrnorm(n/2, c(2,2), diag(2)))
ynew <- as.factor(rep(c(1,2), each=n/2))
ypred <- predict(svm1, xnew)
table(ypred, ynew)
ynew
ypred 1 2
1 49 0
2 1 50
sum(ynew != ypred)/n
[1] 0.01
Now consider case where variance is large, so no separating hyperplane exists.
set.seed(2018)
x <- rbind(mvrnorm(n/2, c(-2,-2), 3*diag(2)), mvrnorm(n/2, c(2,2), 3*diag(2)))
y <- as.factor(rep(c(1,2), each=n/2))
dat <- data.frame(x1 = x[,1], x2= x[,2], y)
with(dat, plot(x2, x1, pch=18, col=y, asp=1))
svm2 <- svm(y~x1+x2, data = dat, kernel = "linear", cost = 1000000000000)
WARNING: reaching max number of iterations
summary(svm2)
Call:
svm(formula = y ~ x1 + x2, data = dat, kernel = "linear", cost = 1e+12)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1e+12
Number of Support Vectors: 11
( 5 6 )
Number of Classes: 2
Levels:
1 2
x[svm2$index,]
[,1] [,2]
[1,] -1.65141691 1.0055995
[2,] -0.89545438 -0.8965873
[3,] 0.36586275 -1.6458518
[4,] -6.49893758 0.4430950
[5,] -3.59851617 0.7802958
[6,] -1.94633386 0.7485015
[7,] 0.06828258 1.0112160
[8,] -0.16203453 -0.1150225
[9,] 0.24195013 -1.0598300
[10,] -2.10317722 0.2036933
[11,] 0.15982206 -1.4821284
plot(svm2, dat)
xnew <- rbind(mvrnorm(n/2, c(-2,-2), 3*diag(2)), mvrnorm(n/2, c(2,2), 3*diag(2)))
ynew <- as.factor(rep(c(1,2), each=n/2))
ypred <- predict(svm2, xnew)
table(ypred, ynew)
ynew
ypred 1 2
1 18 48
2 32 2
sum(ynew != ypred)/n
[1] 0.8
Can see need much more support vectors here. Cost -> /infty, number of support vectors -> optimal number of support vectors.
Kernel SVM
for(i in seq(1, 1000, by = 100)){
svm3 <- svm(y~x1+x2, data=dat, kernel = "radial", gamma = 1, cost = i)
plot(svm3, dat)
ypred <- predict(svm3, xnew)
print(sum(ynew != ypred)/n)
}
[1] 0.1
[1] 0.1
[1] 0.11
[1] 0.11
[1] 0.11
[1] 0.11
[1] 0.1
[1] 0.1
[1] 0.1
[1] 0.11
Cost increases -> regions get more defined (but pred error also increases?)
for(i in seq(1, 100, by=10)){
svm3 <- svm(y~x1+x2, data=dat, kernel = "radial", gamma = i, cost = 1000)
plot(svm3, dat)
ypred <- predict(svm3, xnew)
print(sum(ynew != ypred)/n)
}
[1] 0.11
[1] 0.13
[1] 0.14
[1] 0.17
[1] 0.21
[1] 0.22
[1] 0.23
[1] 0.23
[1] 0.24
[1] 0.25
Same for gamma. Both cases -> leads to overfitting.
Now use tuning to select optimal params.
tune.out <- tune(svm, y~x1+x2, data=dat, kernel="radial",
ranges = list(cost=10^c((-3):3), gamma=10^c((-3):3)))
summary(tune.out)
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
- best performance: 0.07
- Detailed performance results:
NA
Fit and check pred error.
svm3 <- svm(y~x1+x2, data=dat, kernel = "radial", gamma = 0.001, cost = 100)
plot(svm3, dat)
ypred <- predict(svm3, xnew)
print(sum(ynew != ypred)/n)
MNIST SVM
filePath <- "https://raw.githubusercontent.com/AJCoca/SLP19/master/"
fieName <- "mnist.csv"
mnist <- read.csv(paste0(filePath, fileName), header = TRUE)
mnist$digit <- as.factor(mnist$digit)
train <- mnist[1:4000,]
identical <- apply(train, 2, function(v){all(v==v[1])})
train <- train[,!identical] # remove redundant pixels
test <- mnist[4001:6000,!identical]
tune.out <- tune(svm, digit~., data=train, kernel="radial",
ranges = list(cost=10^c((-3):3), gamma=10^c((-3):3)))
Variable(s) ‘pixel.51’ and ‘pixel.143’ and ‘pixel.363’ and ‘pixel.391’ and ‘pixel.671’ constant. Cannot scale data.
Carry out SVM tuning to get optimal cost & gamma params.
tune.out <- tune(svm, digit~., data=train, kernel="radial",
ranges = list(cost=10^c((-3):3), gamma=10^c((-3):3)))
summary(tune.out)
(1-pnorm((0.472/0.0676)))*2
[1] 2.90501e-12